# !diagnostics off

husb = 
  m %>% 
  # select(mspqno, q13bm1st, q13by1st, q13bm2nd, q13by2nd, q13bm3rd, q13by3rd, q13bm4th, q13by4th)
  select(mspqno, 
         # q13a,
         q13by1st, q13by2nd, q13by3rd, q13by4th, 
         # q13c1st, q13c2nd, q13c3rd, q13c4th, 
         q13f1st, q13f2nd, q13f3rd, q13f4th, 
         q13g1st, q13g2nd, q13g3rd, q13g4th,
         q16a1st, q16a2nd, q16a3rd, q16a4th, q18a, q18b) %>%
  mutate(q13by1st=q13by1st+1900, q13by2nd=q13by2nd+1900, q13by3rd=q13by3rd+1900, q13by4th=q13by4th+1900)

MarriageLast = 
  fMovedIn %>%
  # MarriageYear %>%
  group_by(id) %>%
  # select(-MovedInYear, -MovedInMonth, -MarriageMonth) %>%
  filter(RelationshipNum==max(RelationshipNum, na.rm=T))%>%
  left_join(husb, by=c("id"="mspqno")) %>%
  mutate(
    # YearMatch = ifelse(MarriageYear==q13by1st & !is.na(q13by1st), 1,
    #                         ifelse(MarriageYear==q13by2nd & !is.na(q13by2nd), 1,
    #                                ifelse(MarriageYear==q13by3rd & !is.na(q13by3rd), 1, 
    #                                       ifelse(MarriageYear==q13by4th & !is.na(q13by4th), 1, 0)))),
         HusbMarrMatch = ifelse(MarriageYear==q13by1st & !is.na(q13by1st), 1,
                                ifelse(MarriageYear==q13by2nd & !is.na(q13by2nd), 2,
                                       ifelse(MarriageYear==q13by3rd & !is.na(q13by3rd), 3, 
                                              ifelse(MarriageYear==q13by4th & !is.na(q13by4th), 4, 0)))),
         DidNotEnd = ifelse(HusbMarrMatch==1 & q13f1st==1 & !is.na(q13f1st) | 
                              HusbMarrMatch==2 & q13f2nd==1 & !is.na(q13f2nd) | 
                              HusbMarrMatch==3 & q13f3rd==1 & !is.na(q13f3rd) | 
                              HusbMarrMatch==4 & q13f4th==1 & !is.na(q13f4th), 
                            1, ifelse(HusbMarrMatch==0, NA, 0)))

table(MarriageLast$DidNotEnd)
MarriageLast %>% filter(DidNotEnd==0) %>% select(-RelationshipNum)

# id 1515, 148
fMovedIn %>% filter(id==1515)

sum((MarriageLast$id[MarriageLast$HusbMarrMatch==0] %in% m$mspqno))
sum((MarriageLast$HusbMarrMatch[MarriageLast$id%in%m$mspqno]==0), na.rm=T)
sum(is.na(MarriageLast$HusbMarrMatch[MarriageLast$id%in%m$mspqno]==0), na.rm=T)


MarrNoMatch = MarriageLast %>% filter(HusbMarrMatch==0)
sum(MarrNoMatch$id %in% m$mspqno)


# MarriageLast$DidNotEnd[MarriageLast$HusbMarrMatch==0] == 
# sum(!is.na(husb$q12a[is.na(husb$q13a)]))

# m = read_csv("Australia/data/AustralianFamilyProject/ADA_00497_AFP_M_STATA.csv")
# m = m %>% filter(mspqno %in% x$id)
# 
# # Get id variable for m from f file
# m = 
#   x %>% 
#   select(id, spqno) %>% 
#   rename(mid=spqno) %>%
#   right_join(m, by=c("id"="spqno")) %>% 
#   mutate(id=NULL)
#   # rename(id=mid)

mMarriageYear = 
  m %>%
  select(mid, q13by1st, q13by2nd, q13by3rd, q13by4th) %>%
  gather(MarrNum, MarriageYear, q13by1st:q13by4th, na.rm=T) %>%
  mutate(MarrNum = as.integer(substr(MarrNum, 6,6)),
         mRelationshipType = "Marriage")
mMarriageMonth = 
  m %>%
  select(mid, q13bm1st, q13bm2nd, q13bm3rd, q13bm4th) %>%
  gather(MarrNum, MarriageMonth, q13bm1st:q13bm4th, na.rm=T) %>%
  mutate(MarrNum = as.integer(substr(MarrNum, 6,6)),
         mRelationshipType = "Marriage")
mMarriageOver = 
  m %>%
  select(mid, q13f1st, q13f2nd, q13f3rd, q13f4th) %>%
  gather(MarrNum, EndedHow, q13f1st:q13f4th, na.rm=T) %>%
  mutate(MarrNum = as.integer(substr(MarrNum, 5, 5)),
         NotOver = ifelse(EndedHow==1, 1, 0),
         mRelationshipType = "Marriage")
mMarr = mMarriageYear %>%
  full_join(mMarriageMonth) %>%
  full_join(mMarriageOver)
mMarrNotOver = mMarr %>%
  filter(NotOver==1 & (MarriageYear<99 | MarriageMonth<99)) %>%
  select(mid, mRelationshipType, MarriageYear, MarriageMonth)%>%
  rename(mYear = MarriageYear, mMonth=MarriageMonth)
  # left_join(m %>% select(mid, q25c, q25ay, q25am))

# Knowing the month of leaving home does not help
# because there are only two men who report not knowing 
# their month of marriage for an ongoing marriage.
# Only one of these left home for marriage.
# The other left for reason 99 (either no answer or write-in).
# The one who left for marriage was on his third marriage at the time of the survey.
table(mMarr$q25c[mMarr$MarriageMonth==99], mMarr$MarrNum[mMarr$MarriageMonth==99])

mMovedInYear = 
  m %>%
  dplyr::select(mid, q16a1st, q16a2nd, q16a3rd, q16a4th) %>%
  gather(RelationshipNum, MovedInYear, q16a1st:q16a4th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 5, 5)),
         mRelationshipType = "NonMarriage")
mRelationshipOver = 
  m %>%
  dplyr::select(mid, q16c1st, q16c2nd, q16c3rd, q16c4th) %>%
  gather(RelationshipNum, Over, q16c1st:q16c4th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 5, 5)),
         NotOver = Over-1,
         mRelationshipType = "NonMarriage")
mRelationshipDurationY = 
  m %>%
  dplyr::select(mid, q16dy1st, q16dy2nd, q16dy3rd, q16dy4th) %>%
  gather(RelationshipNum, DurationY, q16dy1st:q16dy4th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6, 6)),
         mRelationshipType = "NonMarriage",
         DurationY = ifelse(DurationY==98, 0, DurationY))
mRelationshipDurationM = 
  m %>%
  dplyr::select(mid, q16dm1st, q16dm2nd, q16dm3rd, q16dm4th) %>%
  gather(RelationshipNum, DurationM, q16dm1st:q16dm4th, na.rm=T) %>%
  mutate(RelationshipNum = as.integer(substr(RelationshipNum, 6, 6)),
         mRelationshipType = "NonMarriage",
         DurationM = ifelse(DurationM==98, 0, DurationM))
mRel = mMovedInYear %>%
  full_join(mRelationshipDurationY) %>%
  full_join(mRelationshipDurationM) %>%
  full_join(mRelationshipOver)
mRelNotOver = 
  mRel %>% 
  filter(NotOver==1 & (MovedInYear<99 | DurationY<99 | DurationM<99)) %>%
  select(mid, mRelationshipType, MovedInYear, DurationY, DurationM) %>%
  rename(mYear = MovedInYear)
  # left_join(m %>% select(mid, monrec, dayrec, mondel, daydel)) %>%
  # mutate(yrrec = ifelse(monrec>12, 87, 86))

mRelMarr = 
  mMarrNotOver %>%
  bind_rows(mRelNotOver)

# Everbody gets one
length(unique(mRelMarr$mid))
length(mRelMarr$mid)

# ggplot(mRelMarr, aes(x=Year, group=RelationshipType, color=RelationshipType)) +
#   geom_density()


# Want non-marriage relationships even though they do not give months
# because moves after moving in together could be imputed to women


# # Can get 85% of MovedInYear values right by offsetting date received by a month
# corTemp = tibble(
#   MonOffset = seq(0,5,.01),
#   CorrectProp = NA
# )
# for(i in 1:nrow(corTemp)){
#   mRelTemp = mRelNotOver %>% 
#     mutate(MovedInYearCalc = floor(yrrec-DurationY+(monrec-DurationM-corTemp$MonOffset[i])/12))
#   corTemp$CorrectProp[i] = sum(mRelTemp$MovedInYear==mRelTemp$MovedInYearCalc)/length(mRelTemp$MovedInYearCalc)
#   # corTemp$cor[i] = cor(mRelTemp$MovedInYear, mRelTemp$MovedInYearCalc)
# }
# ggplot(corTemp, aes(x=MonOffset, y=CorrectProp))+
#   geom_point(alpha=.1)

fMovedInNotOver = 
  fMovedIn %>%
  filter(NotOver==1) %>%
  left_join(x %>% select(id, spqno))
  # select(id, RelationshipType, MarriageYear, MarriageMonth)%>%
  # rename(Year = MarriageYear, Month=MarriageMonth)
mfMovedInNotOver = fMovedInNotOver %>%
  left_join(mRelMarr, by=c("spqno"="mid")) %>%
  select(-NotOver)

# There are 75 cases of disagreement in months of marriage/move
sum(mfMovedInNotOver$MarriageMonth != mfMovedInNotOver$mMonth, na.rm=T)
# All of them are marriages
na.omit(mfMovedInNotOver$mRelationshipType[mfMovedInNotOver$MarriageMonth != mfMovedInNotOver$mMonth])
# There is only one case where the woman did not remember the marriage date and the man did
sum(mfMovedInNotOver$MarriageMonth==99 & !is.na(mfMovedInNotOver$mMonth), na.rm=T)
table(mfMovedInNotOver$MarriageMonth, mfMovedInNotOver$mMonth)
table(mfMovedInNotOver$MovedInMonth, mfMovedInNotOver$mMonth)
# There are 8 cases where moved in after marriage but did not remember marriage month
table(mfMovedInNotOver$MovedInMonth, mfMovedInNotOver$MarriageMonth)
# Of these, one man remembered the month
with(mfMovedInNotOver %>% filter(MovedInMonth==98 & MarriageMonth==99),
  table(MovedInMonth, mMonth))
# It was a marriage
mfMovedInNotOver %>% 
  filter(MovedInMonth==98 & MarriageMonth==99)
# Which lines to change
mMonthChangeTemp = with(mfMovedInNotOver, MovedInMonth==98 & MarriageMonth==99 & !is.na(mMonth) & mMonth<98)
mfMovedInNotOver$MarriageMonth[mMonthChangeTemp] = mfMovedInNotOver$mMonth[mMonthChangeTemp]
# This imputation does not matter because the woman was born in 1937
x$BornYear[x$id==1995]

ggplot(mfMovedInNotOver, aes(x=MarriageMonth, y=mMonth)) +
  geom_point(alpha=.1)

# Everbody gets one
length(unique(fMovedInNotOver$id))
length(fMovedInNotOver$id)

# One spqno value was missing from f file
# Solved in setup files
sum(x$id[!is.na(x$id)] %in% m$mspqno[!is.na(m$mspqno)])
sum(m$id[!is.na(m$id)] %in% x$spqno[!is.na(x$spqno)])
sum(x$spqno[!is.na(x$spqno)] %in% m$id[!is.na(m$id)])
sum(m$mspqno[!is.na(m$mspqno)] %in% x$id[!is.na(x$id)])
